# Simulate coalescent process with heterochronous sampling times

# Assumptions and modifications
# - does runs across various sample numbers (data)
# - deposits batch runs in a single folder
# - simulate a single N(t) trajectory
# - samples placed uniformly across time

# Clean the workspace and console
closeAllConnections()
rm(list=ls())
cat("\014")  
graphics.off()

# Packages for phylodyn
library("sp")
library("devtools")
library("INLA")
library("spam")
library("ape")
library("phylodyn")

# Set working directory to source
this.dir <- dirname(parent.frame(2)$ofile)
setwd(this.dir)

# Function to write simple csv files to correct path
tableWrite <- function(val, name, pathname) {
  # Add path to name
  str0 <- paste(c(pathname, name), collapse = "")
  # Write table
  write.table(val, str0, row.names=FALSE, col.names=FALSE, sep=",")
}

# Define a middling bottleneck
bottle_traj <- function (t) 
{
  result = rep(0, length(t))
  result[t <= 15] <- 500
  result[t > 15 & t < 40] <- 20
  result[t >= 40] <- 500
  return(result)
}

# Define a boom-bust with a later changepoint and an offset
boom_traj <- function (t, bust = 20, scale = 1000, offset = 100) 
{
  result = rep(0, length(t))
  result[t <= bust] = scale*exp(t[t <= bust] - bust) + offset
  result[t > bust] = scale*exp(bust - t[t > bust]) + offset
  return(result)
}

# Main code for heterochronous simulations ----------------------------------------------------------

# Choose trajectory case
trajCase = 1
trajNames = c('cyclicSamps', 'bottleSamps', 'boomSamps', 'steepSamps')

# Choose trajectory type
trajType = switch(trajCase,
                  "1"= cyclic_traj,
                  "2"= bottle_traj,
                  "3"= boom_traj,
                  "4"= steep_cyc_traj
)
traj = trajType
trajVal = trajNames[trajCase]


# Range of sample numbers to loop across 
nSamps = seq(201, 1001, 50)
numRuns = length(nSamps)

# Uniform sampling across time
all_samp_end = 48
ndivs = 100

# Create folder for traj specific results
trajName = paste(c(trajVal, '_', numRuns), collapse = '')
dir.create(file.path(this.dir, trajName))
pathf = paste(c(this.dir, '/', trajName, '/'), collapse = "")

# Coalescent events and max time for each trajectory
nc = rep(0, numRuns); tmax = rep(0, numRuns)

for (i in 1:numRuns) {
  # Number of samples introduced at each time
  nsamps = nSamps[i]
  
  # Sample number and times
  samps = c(rep(floor(nsamps/ndivs), ndivs-1), nsamps-(ndivs-1)*floor(nsamps/ndivs))
  samp_times = seq(0, all_samp_end, length.out = ndivs)
  
  # Simulate genealogy and get all times
  gene = coalsim(samp_times = samp_times, n_sampled = samps, traj = traj, lower_bound = 10, method = "thin")
  coal_times = gene$coal_times
  coalLin = gene$lineages
  
  # TMRCA and no. coalescent events
  tmax[i] = max(coal_times)
  nc[i] = length(coal_times)
  
  # Export teajectory specific data for Matlab
  tableWrite(coal_times, paste(c('coaltimes', i, '.csv'), collapse = ''), pathf)
  tableWrite(samp_times, paste(c('samptimes', i, '.csv'), collapse = ''), pathf)
  tableWrite(coalLin, paste(c('coalLin', i, '.csv'), collapse = ''), pathf)
  tableWrite(samps, paste(c('sampIntro', i, '.csv'), collapse = ''), pathf)
}

# No. coalescences, samples and TMRCA
tableWrite(nc, 'nc.csv', pathf)
tableWrite(nSamps, 'ns.csv', pathf)
tableWrite(tmax, 'tmax.csv', pathf)

# True population size
t = seq(0, max(tmax), length=20000); y = traj(t)
tableWrite(t, 'trajt.csv', pathf)
tableWrite(y, 'trajy.csv', pathf)